import os
import numpy as np
import pandas as pd
import random
import seaborn as sns
import datetime as datetime
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from contextlib import contextmanager
from time import time
from tqdm import tqdm
import lightgbm as lgbm
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
pip install seaborn
Collecting seaborn Downloading seaborn-0.11.2-py3-none-any.whl (292 kB) Requirement already satisfied: pandas>=0.23 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.4.2) Requirement already satisfied: matplotlib>=2.2 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (3.5.1) Requirement already satisfied: scipy>=1.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.8.0) Requirement already satisfied: numpy>=1.15 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.22.3) Requirement already satisfied: fonttools>=4.22.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (4.31.2) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (3.0.4) Requirement already satisfied: cycler>=0.10 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: python-dateutil>=2.7 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (1.4.2) Requirement already satisfied: pillow>=6.2.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (9.1.0) Requirement already satisfied: packaging>=20.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: pytz>=2020.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from pandas>=0.23->seaborn) (2022.1) Requirement already satisfied: six>=1.5 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0) Installing collected packages: seaborn Successfully installed seaborn-0.11.2 Note: you may need to restart the kernel to use updated packages.
pip install tqdm
Collecting tqdm Downloading tqdm-4.63.1-py2.py3-none-any.whl (76 kB) Requirement already satisfied: colorama in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from tqdm) (0.4.4) Installing collected packages: tqdm Successfully installed tqdm-4.63.1 Note: you may need to restart the kernel to use updated packages.
pip install lightgbm
Collecting lightgbm Downloading lightgbm-3.3.2-py3-none-win_amd64.whl (1.0 MB) Requirement already satisfied: scikit-learn!=0.22.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.0.2) Requirement already satisfied: numpy in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.22.3) Requirement already satisfied: scipy in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.8.0) Requirement already satisfied: wheel in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (0.37.1) Requirement already satisfied: joblib>=0.11 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from scikit-learn!=0.22.0->lightgbm) (1.1.0) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from scikit-learn!=0.22.0->lightgbm) (3.1.0) Installing collected packages: lightgbm Successfully installed lightgbm-3.3.2 Note: you may need to restart the kernel to use updated packages.
data = pd.read_csv("upload_DJIA_table.csv")
data
| Date | Open | High | Low | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|---|
| 0 | 2016-07-01 | 17924.240234 | 18002.380859 | 17916.910156 | 17949.369141 | 82160000 | 17949.369141 |
| 1 | 2016-06-30 | 17712.759766 | 17930.609375 | 17711.800781 | 17929.990234 | 133030000 | 17929.990234 |
| 2 | 2016-06-29 | 17456.019531 | 17704.509766 | 17456.019531 | 17694.679688 | 106380000 | 17694.679688 |
| 3 | 2016-06-28 | 17190.509766 | 17409.720703 | 17190.509766 | 17409.720703 | 112190000 | 17409.720703 |
| 4 | 2016-06-27 | 17355.210938 | 17355.210938 | 17063.080078 | 17140.240234 | 138740000 | 17140.240234 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1984 | 2008-08-14 | 11532.070312 | 11718.280273 | 11450.889648 | 11615.929688 | 159790000 | 11615.929688 |
| 1985 | 2008-08-13 | 11632.809570 | 11633.780273 | 11453.339844 | 11532.959961 | 182550000 | 11532.959961 |
| 1986 | 2008-08-12 | 11781.700195 | 11782.349609 | 11601.519531 | 11642.469727 | 173590000 | 11642.469727 |
| 1987 | 2008-08-11 | 11729.669922 | 11867.110352 | 11675.530273 | 11782.349609 | 183190000 | 11782.349609 |
| 1988 | 2008-08-08 | 11432.089844 | 11759.959961 | 11388.040039 | 11734.320312 | 212830000 | 11734.320312 |
1989 rows × 7 columns
data['Date']=pd.to_datetime(data['Date'])
data=data.sort_values('Date').reset_index(drop=True)
data
| Date | Open | High | Low | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|---|
| 0 | 2008-08-08 | 11432.089844 | 11759.959961 | 11388.040039 | 11734.320312 | 212830000 | 11734.320312 |
| 1 | 2008-08-11 | 11729.669922 | 11867.110352 | 11675.530273 | 11782.349609 | 183190000 | 11782.349609 |
| 2 | 2008-08-12 | 11781.700195 | 11782.349609 | 11601.519531 | 11642.469727 | 173590000 | 11642.469727 |
| 3 | 2008-08-13 | 11632.809570 | 11633.780273 | 11453.339844 | 11532.959961 | 182550000 | 11532.959961 |
| 4 | 2008-08-14 | 11532.070312 | 11718.280273 | 11450.889648 | 11615.929688 | 159790000 | 11615.929688 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1984 | 2016-06-27 | 17355.210938 | 17355.210938 | 17063.080078 | 17140.240234 | 138740000 | 17140.240234 |
| 1985 | 2016-06-28 | 17190.509766 | 17409.720703 | 17190.509766 | 17409.720703 | 112190000 | 17409.720703 |
| 1986 | 2016-06-29 | 17456.019531 | 17704.509766 | 17456.019531 | 17694.679688 | 106380000 | 17694.679688 |
| 1987 | 2016-06-30 | 17712.759766 | 17930.609375 | 17711.800781 | 17929.990234 | 133030000 | 17929.990234 |
| 1988 | 2016-07-01 | 17924.240234 | 18002.380859 | 17916.910156 | 17949.369141 | 82160000 | 17949.369141 |
1989 rows × 7 columns
data0=data[['Date','Open']]
data0['Open 7-day']=data0['Open'].rolling(window=7).mean()
C:\Users\krudko\AppData\Local\Temp\ipykernel_1128\1706044499.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data0['Open 7-day']=data0['Open'].rolling(window=7).mean()
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=data0['Date'],y=data0['Open 7-day'],name='Open mean 7-day'),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=500,title_text="DJIA Stocks Open mean 7-day")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Price",secondary_y=False)
fig.show()
col0=data0.columns.to_list()
col1=col0+['O7d 7d-before','slope']
data1=pd.DataFrame(columns=col1)
data1[col0]=data0
data1
| Date | Open | Open 7-day | O7d 7d-before | slope | |
|---|---|---|---|---|---|
| 0 | 2008-08-08 | 11432.089844 | NaN | NaN | NaN |
| 1 | 2008-08-11 | 11729.669922 | NaN | NaN | NaN |
| 2 | 2008-08-12 | 11781.700195 | NaN | NaN | NaN |
| 3 | 2008-08-13 | 11632.809570 | NaN | NaN | NaN |
| 4 | 2008-08-14 | 11532.070312 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... |
| 1984 | 2016-06-27 | 17355.210938 | 17753.751395 | NaN | NaN |
| 1985 | 2016-06-28 | 17190.509766 | 17676.190011 | NaN | NaN |
| 1986 | 2016-06-29 | 17456.019531 | 17636.068638 | NaN | NaN |
| 1987 | 2016-06-30 | 17712.759766 | 17619.701451 | NaN | NaN |
| 1988 | 2016-07-01 | 17924.240234 | 17632.782924 | NaN | NaN |
1989 rows × 5 columns
n=len(data0)
for i in range(n):
pmi=data1['Open 7-day'][i]
data1.loc[i+7,'O7d 7d-before']=pmi
data1
| Date | Open | Open 7-day | O7d 7d-before | slope | |
|---|---|---|---|---|---|
| 0 | 2008-08-08 | 11432.089844 | NaN | NaN | NaN |
| 1 | 2008-08-11 | 11729.669922 | NaN | NaN | NaN |
| 2 | 2008-08-12 | 11781.700195 | NaN | NaN | NaN |
| 3 | 2008-08-13 | 11632.809570 | NaN | NaN | NaN |
| 4 | 2008-08-14 | 11532.070312 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... |
| 1991 | NaT | NaN | NaN | 17753.751395 | NaN |
| 1992 | NaT | NaN | NaN | 17676.190011 | NaN |
| 1993 | NaT | NaN | NaN | 17636.068638 | NaN |
| 1994 | NaT | NaN | NaN | 17619.701451 | NaN |
| 1995 | NaT | NaN | NaN | 17632.782924 | NaN |
1996 rows × 5 columns
data1['slope']=(data1['Open 7-day']-data1['O7d 7d-before'])/7
data2=data1[['Date','Open 7-day','O7d 7d-before','slope']]
data3=data2[14:-7]
data3
| Date | Open 7-day | O7d 7d-before | slope | |
|---|---|---|---|---|
| 14 | 2008-08-28 | 11444.291574 | 11632.171456 | -26.839983 |
| 15 | 2008-08-29 | 11496.761579 | 11577.352958 | -11.513054 |
| 16 | 2008-09-02 | 11515.390067 | 11525.00014 | -1.372868 |
| 17 | 2008-09-03 | 11526.707171 | 11495.568778 | 4.448342 |
| 18 | 2008-09-04 | 11513.320034 | 11509.014509 | 0.615075 |
| ... | ... | ... | ... | ... |
| 1984 | 2016-06-27 | 17753.751395 | 17812.551618 | -8.400032 |
| 1985 | 2016-06-28 | 17676.190011 | 17784.198661 | -15.429807 |
| 1986 | 2016-06-29 | 17636.068638 | 17750.897042 | -16.404058 |
| 1987 | 2016-06-30 | 17619.701451 | 17734.969866 | -16.466916 |
| 1988 | 2016-07-01 | 17632.782924 | 17735.279855 | -14.642419 |
1975 rows × 4 columns
fig=make_subplots(specs=[[{"secondary_y":True}]])
fig.add_trace(go.Scatter(x=data3['Date'],y=data3['slope'],name='slope'),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=500,title_text="DJIA Stocks Slope change")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Slope",secondary_y=False)
fig.update_yaxes(title_text="Slope change",secondary_y=True)
fig.show()
news=pd.read_csv('RedditNews.csv')
news['Date']=pd.to_datetime(news['Date'])
news=news.sort_values('Date').reset_index(drop=True)
news
| Date | News | |
|---|---|---|
| 0 | 2008-06-08 | b'Marriage, they said, was reduced to the stat... |
| 1 | 2008-06-08 | b'Nim Chimpsky: The tragedy of the chimp who t... |
| 2 | 2008-06-08 | b"Canada: Beware slippery slope' to censorship... |
| 3 | 2008-06-08 | b'EU Vice-President Luisa Morgantini and the I... |
| 4 | 2008-06-08 | b"Israeli minister: Israel will attack Iran if... |
| ... | ... | ... |
| 73603 | 2016-07-01 | Venezuela, where anger over food shortages is ... |
| 73604 | 2016-07-01 | A Hindu temple worker has been killed by three... |
| 73605 | 2016-07-01 | Ozone layer hole seems to be healing - US &... |
| 73606 | 2016-07-01 | Taiwanese warship accidentally fires missile t... |
| 73607 | 2016-07-01 | A 117-year-old woman in Mexico City finally re... |
73608 rows × 2 columns
data3[data3['slope']<-120]
| Date | Open 7-day | O7d 7d-before | slope | |
|---|---|---|---|---|
| 44 | 2008-10-10 | 9836.432896 | 10867.217215 | -147.254903 |
| 45 | 2008-10-13 | 9498.844308 | 10840.052874 | -191.601224 |
| 46 | 2008-10-14 | 9342.417132 | 10787.75865 | -206.47736 |
| 47 | 2008-10-15 | 9196.615793 | 10715.665737 | -217.007135 |
| 48 | 2008-10-16 | 8999.704381 | 10563.720006 | -223.430804 |
| 49 | 2008-10-17 | 8933.721401 | 10320.521484 | -198.114298 |
| 50 | 2008-10-20 | 8875.237026 | 10161.96582 | -183.818399 |
| 51 | 2008-10-21 | 8974.398438 | 9836.432896 | -123.14778 |
| 756 | 2011-08-09 | 11665.772880 | 12514.731445 | -121.279795 |
| 757 | 2011-08-10 | 11534.884347 | 12454.324219 | -131.348553 |
| 758 | 2011-08-11 | 11334.895787 | 12369.332729 | -147.776706 |
| 759 | 2011-08-12 | 11231.998605 | 12252.76423 | -145.823661 |
| 760 | 2011-08-15 | 11142.854213 | 12153.012835 | -144.308375 |
| 1775 | 2015-08-27 | 16592.648437 | 17453.345703 | -122.956752 |
| 1776 | 2015-08-28 | 16469.888393 | 17472.425781 | -143.219627 |
| 1777 | 2015-08-31 | 16367.988281 | 17436.958705 | -152.710061 |
| 1778 | 2015-09-01 | 16301.893973 | 17380.924386 | -154.147202 |
| 1779 | 2015-09-02 | 16244.551060 | 17246.368582 | -143.116789 |
| 1871 | 2016-01-14 | 16575.117048 | 17504.552734 | -132.776527 |
| 1872 | 2016-01-15 | 16460.759905 | 17441.919922 | -140.165717 |
| 1873 | 2016-01-19 | 16335.201451 | 17349.448382 | -144.892419 |
| 1874 | 2016-01-20 | 16259.527204 | 17202.562779 | -134.719368 |
news1=news[news['Date']=='2008-10-08']['News'].tolist()
for i in range(len(news1)):
print(news1[i][1:])
print()
'NYTimes: U.S. Inquiry Is Said to Conclude 30 Civilians Died in Afghan Raid ' 'Modern slavery in Dubai' 'Brazil and Argentina, two biggest economies in South America, Monday launched a new payment system of bilateral transaction with their local currencies, aimed at eliminating the U.S. dollar ' "The British government's plan for teachers to monitor their pupils for signs of potential extremism only stifles debate and encourages secrecy." 'Attack on U.S. Embassy in Yemen Linked to Israeli Mossad' 'Russian President Dmitry Medvedev has started his own video blog ' "China's super-wealthy lose a third of their wealth over night" 'Chinese Space Walk Filmed in Water' 'UK announces 50bn ($88bn) rescue plan' '14-year-old Iowa girl abandoned under Nebraska law' ' The Matrix of Death. A New Dossier on the (Im)Precision of U.S Bombing and the (Under)Valuation of an Afghan Life' 'Russia to cut military personnel to 1 mln by 2012' 'A bunch of multinationals have figured out how to make their pollution-based businesses seem like the solution to the climate crisis.' 'World economic crisis deepens: Nikkei sinks 9%, Russian exchange was shut down after a huge decline at the open, U.K. bailing out banks' "Anger over Baader-Meinhof Biopic: Victims' Families in Uproar over New German Terrorism Film" 'Is CNN Biased In This Presidential Race? I have been watching and testing' 'China reluctant to reveal tainted milk figures' 'There Will Be No Lasting Peace without the Taliban' "Hindu mobs in Indian province burning and killing Christians and Muslims...why isn't this in the news?" "English woman fights to use dead husband's sperm" 'UK banks are now part nationalised as Government injects 500 billion' 'The latest conflict simmering between Lebanon and Israel is all about food: Lebanese businessmen accusing Israel of stealing traditional Middle Eastern dishes like hummus' 'Why are Afghan criminal networks secretly stockpiling enough heroin to supply every junkie on the planet? And where is it?' 'The assassination of Salvador Vergara Cruz, mayor of a Mexican resort town, may represent a turning point in how Mexico deals with its drug trafficking problem.' 'China: No more Western religious music in concert halls.'
news1=news[news['Date']=='2008-10-09']['News'].tolist()
for i in range(len(news1)):
print(news1[i][1:])
print()
'"Sovietology, like paranoia, is a very dangerous disease, and it is a pity that part of the U.S. administration still suffers from it," Medvedev said.' 'Jews were never exiled from the Holy Land, most of todays Jews have no historical connection to "Israel"; the only political solution to conflict with the Palestinians is to abolish the Jewish state.' 'Prior to the tyrannical theft of Palestine via the Balfour Declaration issued to Lord Rothschild the Middle East had been a relatively peaceful area.' 'Icelandic Regulator Takes Control of Kaupthing Bank ' 'Russian president Dmitry Medvedev calls on European leaders to create a new world order that minimises the role of the US' 'China milk victims may have doubled to over 90,000' 'U.K. Uses Anti-Terrorism Law to Seize Icelandic Bank Assets ' 'Mexico: Government agents killed 4 to 6 people in Chiapas, 3 of them mafia execution-style. Residents respond by briefly capturing 77 government agents.' 'A 14 year old schoolboy posed as a female British secret service spy in an internet chatroom to persuade a 16 year old friend to try to murder him' 'The Bush administration this month is quietly cutting off birth control supplies to some of the worlds poorest women in Africa.' 'Enraged neighbors briefly captured 77 police officers using nothing but sticks and traditional machetes.' 'From Germany to Guantanamo: The Career of Prisoner No. 760 believed to have provided aid to the Sept. 11 attackers, and the confessions extracted from him by torture could collapse in court. ' 'IMF: World on Brink of Recession' 'Military Justifies Attack That Killed at Least 33 Afghan Civilians (including 12 children)' "Why Iceland's economy collapsed" ' North Korea said to be deploying missiles' 'Blast rocks Pakistan capital: At least 12 people are feared dead in a suspected car bombing at police headquarters' 'U.S. report warns of crisis in Afghanistan' 'Russia Pulls Out From Georgia Buffer Zones' 'You too can stranglehold your opponents like Vladimir Putin ...' 'Terror law used for Iceland deposits' 'New World Order: Global co-operation, nationalisation and state intervention - all in one day' 'UK uses anti-terror laws against Icelandic bank' "Britain 'could be mining landfill for gold in a decade'" 'German law professor who almost stopped the EU: Elite wants world government '
news1=news[news['Date']=='2008-10-10']['News'].tolist()
for i in range(len(news1)):
print(news1[i][1:])
print()
'Equipped with knives, sticks and clubs, they all had one purpose: to do harm to Arabs for being Arabs.' 'Asian stock markets prunge over 10 percent on news that the sky is falling' 'UK accuses Iceland of economic terrorism: Seizes assets' "Iran's 'Nuclear Detonators' Are A CIA Fake" 'Yom Kippur: Jews Riot in Acre Over Arab Driving A Car ' 'Police faced off against hundreds of Jewish rioters chanting "death to Arabs" and trying to block the city\'s main thoroughfare' 'Nobel peace price for 2008 to Martti Ahtisaari' 'French President Nicolas Sarkozy: Lets put things clearly. There was a Georgian military aggression. That was a mistake. But the Russian armys reaction was disproportionate' 'Iceland has gone bankrupt. They are likely going to ask the International Monetary Fund for help and ditch the krona.' 'Icelandic Currency (ISK) loses 73% of its value in a single day. Yesterday, 1 USD = 92 ISK. Today, 1 USD = 418 ISK. ' 'Oops! "Jewish People" is a Myth, says Jewish Historian ' "Nature loss 'dwarfs bank crisis'" 'Pakistan in crisis on mission for US funding' 'Banned from Sumo for life for smoking marijuana? Japan even more uptight about pot than US' 'Bulgarian roses will be planted in France' 'Mullah Omar: U.S. Should Withdraw Now Or Meet The Same Fate As The Soviets' 'Congressional leaders Harry Reid and Nancy Pelosi urged President Bush Thursday to call an emergency meeting with the G8 to address international financial instability.' 'Inflammatory Republican rallies raise concerns' 'Rioting between Jews in Arabs has resumed in Akko after a Yom Kippur filled with violence in the racially mixed city' "Two of this week's Nobel Prize winners talk about how the destruction of Hiroshima and Nagasaki changed their lives." 'Meanwhile, shares on the Tehran stock exchange have increased in value by 20% during the year. (Of course, the majority of their eggs are in the oil basket....)' 'Oh My! Whatever Will The Oil Barons & Ministers Do? We need a rally for OPEC!!' '"Sources inside the (World ) bank confirm that servers in the institution\'s highly-restricted treasury unit were deeply penetrated with spy software last April."' 'FTSE plunges 440pts in 10 minutes as markets around the world go into freefall again' 'Libya is to withdraw all its assets from Swiss banks, estimated at $7bn'